set more 1
* manual installation of ztree2stata required: https://sites.google.com/view/takekan/research/ztree2stata
* ssc install fs
* ssc install sxpose

cd ..\data

*************************
* GENERATE MAIN DATASET *
*************************

* Import and merge raw data
*==========================

fs rawdata*.xls
foreach f in `r(files)' {
	ztree2stata subjects using `f', clear 
	save `f'_temp_subjects.dta, replace
	ztree2stata globals using `f', clear 
	keep treatment Period quiz_correct*
	replace Period=Period+5 if treatment==2
	replace Period=Period+10 if treatment==3
	merge 1:m treatment Period using `f'_temp_subjects.dta
	save `f'_temp.dta, replace
}
clear all
fs *xls_temp.dta
foreach f in `r(files)' {
	append using `f'
}
save allxls_temp.dta, replace
clear all
fs *.sbj
foreach f in `r(files)' {
	import delimited `f', clear 
	sxpose, clear firstnames
	gen session=substr("`f'",9,11)
	save `f'_temp_quest.dta, replace
}
clear all
fs *_temp_quest.dta
foreach f in `r(files)' {
	append using `f'
}
ds risk altruism1 altruism2 negrecip1 negrecip2 negrecip3 posrecip1 posrecip2
foreach v in `r(varlist)'{
	replace `v'=substr(`v',1,2)
}
destring *, replace
merge 1:m session Subject using allxls_temp.dta, nogen
fs *_temp*.dta
foreach f in `r(files)' {
	erase `f'
}

* Clean raw data
*===============

rename *, lower
rename session datetime
rename treatment part
replace gstar_opponent=-99 if part==3
rename transfer_team amount_team
rename transfer_opponent amount_opponent

* Generate identifiers
*=====================

gen treatment_id=2 // Structured
replace treatment_id=0 if datetime=="180523_0951" | datetime=="180611_1056" | datetime=="180620_1436" // Baseline
replace treatment_id=1 if datetime=="180521_1433" | datetime=="180523_1411" | datetime=="180524_0919" // Decentralized
replace treatment_id=3 if datetime=="180611_1446" // Decentralized with N=2 only
sort treatment_id datetime group team subject period
egen subject_id=group(datetime subject)
egen team_id=group(datetime team)
egen economy_id=group(datetime group)
gen economy_type=.
replace economy_type=1 if gstar+gstar_opponent==8
replace economy_type=2 if gstar+gstar_opponent==12
replace economy_type=3 if gstar+gstar_opponent==16
replace economy_type=0 if gstar_opponent==-99
bysort team_id (period): replace economy_type=economy_type[1] if treatment_id==3

* Generate/clean other useful variables
*======================================

* Indicator of whether the team has the highes ideal point in the economy
gen high_gstar=gstar==10 | (gstar==6 & gstar_opponent==2)

* Indicator of whether the team made an offer
gen offer_made=0 if treatment_id>0 & amount_team==-99
replace offer_made=1 if treatment_id>0 & amount_team!=-99
replace offer_made=0 if treatment_id>0 & sign_team==-99 & request_team==-99

* Compute total earnings in an economy in a given period (total surplus)
bysort economy_id period: egen earnings_tot=sum(earnings_own)
replace earnings_tot=earnings_tot/2

* Gini index on cumulate earnings (end of each part)
*Creating final income per team per treatment-part 
gen finc=earnings_own_cum if period==5 | period==10 | period==25
bysort team_id period (subject_id): replace finc=. if _n==2
*Creating total final income per group per treatment-part
bysort economy_id period: egen cum_finc=sum(finc)
bysort team_id period (subject_id): replace cum_finc=. if _n==2
*Creating number of teams in a group
gen grp_n=2 
replace grp_n=6 if part==3
bysort team_id period (subject_id): replace grp_n=. if _n==2
*Creating ranking of team earnings within group
gsort +economy_id +period +finc
bysort economy_id period: gen rank=_n if finc!=.
*Creating mean
gen meanfinc=cum_finc/grp_n
*Creating percentage sum of income
gen fincsum_a=rank*(finc - meanfinc)
bysort economy_id part: egen fincsum=sum(fincsum_a) if finc!=.
*Gini
gen gini=(2/(((grp_n)^2)*meanfinc))*(fincsum)

* Earnings in Euro
bysort subject_id (period): replace euros=euros[_N]

*Mistakes in the quiz
replace quiz15=quiz11 if treatment_id==0
replace quiz11=. if treatment_id==0
replace quiz16=quiz12 if treatment_id==0
replace quiz12=. if treatment_id==0
replace quiz17=quiz13 if treatment_id==0
replace quiz_correct15=quiz_correct11 if treatment_id==0
replace quiz_correct11=. if treatment_id==0
replace quiz_correct16=quiz_correct12 if treatment_id==0
replace quiz_correct12=. if treatment_id==0
replace quiz_correct17=quiz_correct13 if treatment_id==0
forval i=1/10{
	bysort subject_id (period): replace quiz`i'=quiz`i'[1]
	bysort subject_id (period): replace quiz_correct`i'=quiz_correct`i'[1]
}
forval i=11/12{
	bysort subject_id (period): replace quiz`i'=quiz`i'[6]
	bysort subject_id (period): replace quiz_correct`i'=quiz_correct`i'[6]
}
forval i=15/17{
	bysort subject_id (period): replace quiz`i'=quiz`i'[11]
	bysort subject_id (period): replace quiz_correct`i'=quiz_correct`i'[11]
}
foreach i of num 1 2 3 4 5 7 8 9 10 11 12 15 16 17{
	gen bad_quiz`i'=(quiz`i'!=quiz_correct`i')
}
ds bad_quiz*
foreach i in `r(varlist)'{
	replace `i'=. if period>1
	bysort team_id: egen `i'_team=sum(`i')
	gen `i'_team_allwrong=`i'_team==2 
}
egen mistakes_pt1=rowtotal(bad_quiz1_team_allwrong bad_quiz2_team_allwrong bad_quiz3_team_allwrong ///
 bad_quiz4_team_allwrong bad_quiz5_team_allwrong bad_quiz7_team_allwrong ///
 bad_quiz8_team_allwrong bad_quiz9_team_allwrong bad_quiz10_team_allwrong)
egen mistakes_pt2=rowtotal(bad_quiz11_team_allwrong bad_quiz12_team_allwrong)
egen mistakes_pt3=rowtotal(bad_quiz15_team_allwrong bad_quiz16_team_allwrong bad_quiz17_team_allwrong)
drop *allwrong

* Initialize side-payment variables in Stata format
*==================================================

ds offer_made transfers_sent transfers_received target_team request_team amount_team sign_team request_opponent amount_opponent sign_opponent
foreach v in `r(varlist)'{
	replace `v'=. if `v'<=-99
}
replace target_team=3 if part==2 & economy_type==1 & gstar==2 & request_team<. 
replace target_team=1 if part==2 & economy_type==1 & gstar==6 & request_team<. 
replace target_team=5 if part==2 & economy_type==2 & gstar==2 & request_team<. 
replace target_team=1 if part==2 & economy_type==2 & gstar==10 & request_team<. 
replace target_team=5 if part==2 & economy_type==3 & gstar==6 & request_team<. 
replace target_team=3 if part==2 & economy_type==3 & gstar==10 & request_team<. 
replace target_team=56 if treatment_id==2 & part==3 & request_team<. // each promise in Structured targets 5 and 6 jointly
replace offer_made=.
replace offer_made=0 if treatment_id==1 & request_team==.
replace offer_made=1 if treatment_id==1 & request_team<.
replace offer_made=0 if treatment_id==2 & request_team==. & high_gstar==0
replace offer_made=1 if treatment_id==2 & request_team<. & high_gstar==0
replace transfers_sent=. if treatment_id==2 & high_gstar==1 
replace transfers_received=. if treatment_id==2 & high_gstar==0
gen offer_taken=.
replace offer_taken=0 if treatment_id>0 & offer_made==1 & transfers_sent==0
replace offer_taken=1 if treatment_id>0 & offer_made==1 & transfers_sent==amount_team
rename target_1 target_own_1
rename target_2 target_own_2
rename transfer_1 amount_own_1
rename transfer_2 amount_own_2
rename transfer_partner_1 amount_partner_1
rename transfer_partner_2 amount_partner_2
rename request_1 request_own_1
rename request_2 request_own_2
rename sign_1 sign_own_1
rename sign_2 sign_own_2

*Label key variables and values
*==============================

label define economy_type /// 
  1 "Economy ({it:L}, {it:M})" /// 
  2 "Economy ({it:L}, {it:H})" /// 
  3 "Economy ({it:M}, {it:H})" /// 
  0 "Economy (2{it:L}, 2{it:M}, 2{it:H})", replace 
label value economy_type economy_type
label define gstar /// 
  2 "G* = {it:L}" /// 
  6 "G* = {it:M}" /// 
  10 "G* = {it:H}", replace 
label value gstar gstar
label define treatment_id /// 
  0 "Baseline" ///
  1 "Decentralized" ///
  2 "Structured" ///
  3 "Decentralized {it:N =} 2 only", replace 
label value treatment_id treatment_id
label var risk "Proneness to risk-taking"
label var amount_opponent "Offered amount"
label var request_opponent "Requested effort"

drop _var1 client tables rnd* x* y* time* *max* *min* rank* calculator *profit* participate team_opponent *correct* _merge* *hyp* group team g_l1 g_l2 g_m1 g_m2 g_h1 g_h2 *_ab *_ac *_ad *_ae *_af *_ba *_bc *_bd *_be *_bf *_ca *_cb *_cd *_ce *_cf *_da *_db *_dc *_de *_df *_ea *_eb *_ec *_ed *_ef *_fa *_fb *_fc *_fd *_fe grp_n finc cum_finc meanfinc fincsum_a fincsum earnings_own earnings_own_cum g_opponent promise_dummy ransfer* no_promise_* field experbefore difficulty game quiz* bad_quiz*_team fulfill fulfill_opponent
ds datetime earnings_tot, not
foreach var in `r(varlist)'{
	replace `var'=. if `var'<=-99
}

sort treatment_id team_id period subject_id
order treatment_id economy_id economy_type team_id subject_id gstar gstar_opponent high_gstar part period g_team g earnings_tot gini offer_made offer_taken transfers_sent transfers_received target_team amount_team request_team sign_team amount_opponent request_opponent sign_opponent g_*_1 target_*_1 amount_*_1 request_*_1 sign_*_1 g_*_2 target_*_2 amount_*_2 request_*_2 sign_*_2 risk altruism1 altruism2 negrecip1 negrecip2 negrecip3 posrecip1 posrecip2 bad_quiz1 bad_quiz2 bad_quiz3 bad_quiz4 bad_quiz5 bad_quiz7 bad_quiz8 bad_quiz9 bad_quiz10 bad_quiz11 bad_quiz12 bad_quiz15 bad_quiz16 bad_quiz17 mistakes_pt1 mistakes_pt2 mistakes_pt3 datetime subject euros

compress
save paper_data.dta, replace


********************************
* GENERATE PREDICTION DATASETS *
********************************

clear all
set obs 560
gen amount_team=_n
forvalues i=1/6{
	replace amount_team=_n-`i'*80 if _n>`i'*80
}
gen request_team=2
forvalues i=3/8{
	replace request_team=`i' if _n>(`i'-2)*80
}
gen gstar_opponent=6
gen economy_type=1
gen profitable=-4*request_team-10*abs(gstar_opponent-request_team)+amount_team>=-4*gstar_opponent
bysort request_team (amount_team): gen first_profitable=1 if request_team==gstar_opponent & _n==1
bysort request_team (amount_team): replace first_profitable=1 if profitable==1 & profitable[_n-1]==0
keep if first_profitable==1
save prediction_econtype1.dta, replace

clear all
set obs 560
gen amount_team=_n
forvalues i=1/6{
	replace amount_team=_n-`i'*80 if _n>`i'*80
}
gen request_team=2
forvalues i=3/8{
	replace request_team=`i' if _n>(`i'-2)*80
}
gen gstar_opponent=10
gen economy_type=2
gen profitable=-4*request_team-10*abs(gstar_opponent-request_team)+amount_team>=-4*gstar_opponent
bysort request_team (amount_team): gen first_profitable=1 if request_team==gstar_opponent & _n==1
bysort request_team (amount_team): replace first_profitable=1 if profitable==1 & profitable[_n-1]==0
keep if first_profitable==1
save prediction_econtype2.dta, replace

clear all
set obs 560
gen amount_team=_n
forvalues i=1/6{
	replace amount_team=_n-`i'*80 if _n>`i'*80
}
gen request_team=2
forvalues i=3/8{
	replace request_team=`i' if _n>(`i'-2)*80
}
gen gstar_opponent=10
gen economy_type=3
gen profitable=-4*request_team-10*abs(gstar_opponent-request_team)+amount_team>=-4*gstar_opponent
bysort request_team (amount_team): gen first_profitable=1 if request_team==gstar_opponent & _n==1
bysort request_team (amount_team): replace first_profitable=1 if profitable==1 & profitable[_n-1]==0
keep if first_profitable==1
save prediction_econtype3.dta, replace

use prediction_econtype1, clear
append using prediction_econtype2
append using prediction_econtype3
save predictions.dta, replace
erase prediction_econtype1.dta
erase prediction_econtype2.dta
erase prediction_econtype3.dta

*************************
* GENERATE CHAT DATASET *
*************************

* Import and merge raw data
clear 
fs rawdata*.xls
foreach f in `r(files)' {
	import delimited `f', varnames(28) clear 
	keep if contracts=="contracts"
	save `f'_temp.dta, replace
}
clear all
fs *xls_temp.dta
foreach f in `r(files)' {
	append using `f', force
}
fs *_temp.dta
foreach f in `r(files)' {
	erase `f'
}

rename timechattryoutchatmessage timetryoutchat
rename timeproduction1chatmessage timeeffortchat
rename v2 part
gen period2=period
replace period2="" if period=="Period"
destring period2, replace
replace period2=period2+5 if part==2
replace period2=period2+10 if part==3
order period2, after(period)
drop period
rename period2 period
gen datetime=_1433 if _1433!=""
replace datetime=_0927 if _0927!=""
replace datetime=_0946 if _0946!=""
replace datetime=_0947 if _0947!=""
replace datetime=_0951 if _0951!=""
replace datetime=_0919 if _0919!=""
replace datetime=_1056 if _1056!=""
replace datetime=_1411 if _1411!=""
replace datetime=_1428 if _1428!=""
replace datetime=_1436 if _1436!=""
replace datetime=_1446 if _1446!=""
replace datetime=_1605 if _1605!=""
gen treatment_id=2 // Structured
replace treatment_id=0 if datetime=="180523_0951" | datetime=="180611_1056" | datetime=="180620_1436" // Baseline
replace treatment_id=1 if datetime=="180521_1433" | datetime=="180523_1411" | datetime=="180524_0919" // Decentralized
replace treatment_id=3 if datetime=="180611_1446" // Decentralized with N=2 only

drop v* _* contracts
drop if period==.
replace timetryoutchat="" if timetryoutchat=="-"
destring, replace
drop if timetryoutchat<0 // drop chat tryout during instructions

replace timeeffortchat=timetryoutchat if treatment_id==0 & part>1
gen timeofferchat=timetryoutchat if treatment_id>0
replace timeofferchat=. if part==1
replace timeofferchat=. if timeofferchat==0 & timeeffortchat>0
replace timeeffortchat=. if timeofferchat>0 & timeeffortchat==0
drop timetryoutchat

gen effort_stage=timeeffortchat<.

gen time=timeeffortchat
replace time=timeeffortchat-60 if treatment_id==0 & period>18
replace time=timeofferchat if treatment_id>0 & effort_stage==0 
replace time=timeeffortchat-30 if treatment_id>0 & period>18
replace time=timeofferchat-60 if treatment_id>0 & period>18 & effort_stage==0
drop if time==. // drop one "ciao" in the chat tryout from a subject who typed before the timeout started

merge m:1 datetime subject period using paper_data.dta
drop if treatment_id==3

expand 2 if effort_stage==., gen(dupindicator) // create two stages for teams who didn't chat during both stages of a period
replace effort_stage=0 if dupindicator==1 & effort_stage==.
replace effort_stage=1 if dupindicator==0 & effort_stage==.
sort treatment_id team_id subject_id period effort_stage 
drop if effort_stage==0 & part==1

bysort subject_id period: gen id=subject_id[_n==1]
bysort team_id: egen id_min=min(id)
gen sbj_a=id_min==id
replace sbj_a=. if id==.
bysort subject_id: egen sbj=max(sbj_a)

drop if effort_stage==0 & part==1
drop *_a dupindicator
keep treatment_id economy_id team_id gstar high_gstar part period effort_stage time subject_id text 
order treatment_id economy_id team_id gstar part period effort_stage time subject_id text
gsort treatment_id economy_id team_id period effort_stage -time

save chat.dta, replace

********************************
* GENERATE CHAT CLASSIFICATION *
********************************

* Import and merge raw data
clear 
fs *coder*.xls
foreach f in `r(files)' {
    import excel `f', firstrow clear
	gen source="`f'"
	destring *, replace
	save `f'_temp.dta, replace
}
clear all
fs *coder*xls_temp.dta
foreach f in `r(files)' {
	append using `f'
}
fs *coder*xls_temp.dta
foreach f in `r(files)' {
	erase `f'
}

gen high_gstar=0
replace high_gstar=1 if gstar==10
replace high_gstar=1 if gstar==6 & economy_type=="G*=2,G*=6"
gen part=1
replace part=2 if period>5
replace part=3 if period>10
gen coder_id=(source=="baseline_coder1.xls" | source=="decentralized_coder1.xls" | source=="structured_coder1.xls")
rename treatment_id treatment
sort treatment
egen treatment_id=group(treatment)
replace treatment_id=treatment_id-1

destring difficult, replace
global cat "confused_us confused_them coordination difficult prosocial antisocial overconfident giveuppromises disagree"
egen rowtot=rowtotal($cat), missing
order rowtot
drop if rowtot<0 | rowtot==.
sort treatment_id team_id period coder_id
foreach v of varlist $cat{
	replace `v'=0 if `v'==.
}

gen stage2=stage=="production"

label define stage2 /// 
  0 "Offer stage" /// 
  1 "Effort stage", replace 
label value stage2 stage2
label define part /// 
  2 "Part 2 (N=2)" /// 
  3 "Part 3 (N=6)", replace 
label value part part
label define gstar /// 
  2 "G* = 2" /// 
  6 "G* = 6" /// 
  10 "G* = 10", replace 
label value gstar gstar
label define high_gstar /// 
  0 "Low G*" /// 
  1 "High G*", replace 
label value high_gstar high_gstar
label define coder_id /// 
  0 "Coder 1" /// 
  1 "Coder 2", replace 
label value coder_id coder_id
label define treatment_id /// 
  0 "Baseline" /// 
  1 "Decentralized" ///
  2 "Structured", replace 
label value treatment_id treatment_id

drop source stage treatment economy_type
rename stage2 effort_stage
sort coder_id treatment_id team_id period effort_stage
order coder_id treatment_id economy_id team_id gstar part period effort_stage

compress
save coders_output.dta, replace
